import matplotlib.pyplot as plt
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
import seaborn as sns
import numpy as np
SessionA = pd.read_csv('SessionA.csv', delimiter = ';')
SessionB = pd.read_csv('SessionB.csv', delimiter = ';')
SessionC = pd.read_csv('SessionC.csv', delimiter = ';')
SessionD = pd.read_csv('SessionD.csv', delimiter = ';')
SessionE = pd.read_csv('SessionE.csv', delimiter = ';')
SessionF = pd.read_csv('SessionF.csv', delimiter = ';')
SessionG = pd.read_csv('SessionG.csv', delimiter = ';')
SessionH = pd.read_csv('SessionH.csv', delimiter = ';')
SessionI = pd.read_csv('SessionI.csv', delimiter = ';')
SessionJ = pd.read_csv('SessionJ.csv', delimiter = ';')
SessionK = pd.read_csv('SessionK.csv', delimiter = ';')
SessionL = pd.read_csv('SessionL.csv', delimiter = ';')
SessionM = pd.read_csv('SessionM.csv', delimiter = ';')
SessionN = pd.read_csv('SessionN.csv', delimiter = ';')
SessionO = pd.read_csv('SessionO.csv', delimiter = ';')
SessionP = pd.read_csv('SessionP.csv', delimiter = ';')
SessionQ = pd.read_csv('SessionQ.csv', delimiter = ';')
SessionR = pd.read_csv('SessionR.csv', delimiter = ';')
SessionS = pd.read_csv('SessionS.csv', delimiter = ';')
SessionT = pd.read_csv('SessionT.csv', delimiter = ';')
dep = [str(i) for i in range(31)]
def findP(x):
for p in reversed(dep):
if p in x:
return "Participant" + p
#Data Clean - Organised for Plotting
def dfClean(df):
df = df[df.columns.drop(list(df.filter(regex=r'\AArousal+.[0-9]')))]
df = df[df.columns.drop(list(df.filter(regex=r'\AValence+.[0-9]')))]
df = df[df.columns.drop(list(df.filter(regex=r'\AArousal')))]
df = df[df.columns.drop(list(df.filter(regex=r'\AValence')))]
df = df.drop('Sec↓/Measure →', axis=1)
df = df.stack()
df = df.to_frame()
df.reset_index(level = 1, inplace = True)
df['Participants'] = df['level_1'].apply(findP)
df = df.replace(regex=r'\AArousal+.[0-9]', value = 'Arousal')
df = df.replace(regex=r'\AValence+.[0-9]', value = 'Valence')
df = df.replace(regex=r'\ASym/Hap+.[0-9]', value = 'Sym/Hap')
df = df.replace(regex=r'\AFoc/Ang+.[0-9]', value = 'Foc/Ang')
df = df.replace(regex=r'\ASurprise+.[0-9]', value = 'Surprise')
df = df.replace(regex=r'\ADis/Sad', value = 'Ind/Sad')
df = df.replace(regex=r'\AInd/Sad+.[0-9]', value = 'Ind/Sad')
df = df.replace(regex=r'\AArousal+[0-9]', value = 'Arousal')
df = df.replace(regex=r'\AValence+[0-9]', value = 'Valence')
df = df.replace(regex=r'\ASym/Hap+[0-9]', value = 'Sym/Hap')
df = df.replace(regex=r'\AFoc/Ang+[0-9]', value = 'Foc/Ang')
df = df.replace(regex=r'\ASurprise+[0-9]', value = 'Surprise')
df = df.replace(regex=r'\AInd/Sad+[0-9]', value = 'Ind/Sad')
df.loc[df['Participants'].isnull(), 'Participants'] = 'Participant30'
df = df.rename_axis('Second')
df.columns = ["Moods", "Values", "Participants"]
return df
SessionA = dfClean(SessionA)
SessionB = dfClean(SessionB)
SessionC = dfClean(SessionC)
SessionD = dfClean(SessionD)
SessionE = dfClean(SessionE)
SessionF = dfClean(SessionF)
SessionG = dfClean(SessionG)
SessionH = dfClean(SessionH)
SessionI = dfClean(SessionI)
SessionJ = dfClean(SessionJ)
SessionK = dfClean(SessionK)
SessionL = dfClean(SessionL)
SessionM = dfClean(SessionM)
SessionN = dfClean(SessionN)
SessionO = dfClean(SessionO)
SessionP = dfClean(SessionP)
SessionQ = dfClean(SessionQ)
SessionR = dfClean(SessionR)
SessionS = dfClean(SessionS)
SessionT = dfClean(SessionT)
#Function Data Viz (All)
def showAll(df):
sns.set_theme(style = "ticks")
f, ax = plt.subplots(2, figsize=(12, 8))
ax[0].set_xscale("linear")
sns.boxplot(x="Values", y = "Moods", data = df,
whis = [0,100], width=.7, palette = "vlag",
ax = ax[0])
sns.stripplot(x = "Values", y = "Moods", data = df,
size = 4, color = ".3", linewidth=0, ax = ax[0])
ax[0].xaxis.grid(True)
ax[0].set(ylabel="")
sns.lineplot(x = 'Second', y = 'Values',
hue = 'Moods', data = df, ax = ax[1])
ax[1].yaxis.grid(True)
#Function Data Viz (ScatterPlot)
def checkOrder(df1, df2):
sns.set_theme(style = "ticks")
f, ax = plt.subplots(2, figsize=(12, 8))
ax[0].set_xscale("linear")
ax[1].set_xscale("linear")
sns.boxplot(x="Values", y = "Moods", data = df1,
whis = [0,100], width=.7, palette = "vlag",
ax = ax[0])
sns.stripplot(x = "Values", y = "Moods", data = df1,
size = 4, color = ".3", linewidth=0, ax = ax[0])
ax[0].xaxis.grid(True)
ax[0].set(ylabel="")
sns.boxplot(x="Values", y = "Moods", data = df2,
whis = [0,100], width=.7, palette = "vlag",
ax = ax[1])
sns.stripplot(x = "Values", y = "Moods", data = df2,
size = 4, color = ".3", linewidth=0, ax = ax[1])
ax[1].xaxis.grid(True)
ax[1].set(ylabel="")
#Function Data Viz (BoxPlot)
def showBox(df):
sns.set_theme(style = "ticks")
f, ax = plt.subplots(figsize=(12, 8))
ax.set_xscale("linear")
sns.boxplot(x="Values", y = "Moods", data = df,
whis = [0,100], width=.7, palette = "vlag")
sns.stripplot(x = "Values", y = "Moods", data = df,
size = 4, color = ".3", linewidth=0)
ax.xaxis.grid(True)
ax.set(ylabel="")
sns.despine(trim = True, left = True)
#Function Data Viz (Line Plot)
def showLine(df):
sns.set_theme(style = "ticks")
f, ax = plt.subplots(figsize=(12, 8))
sns.lineplot(x = 'Second', y = 'Values',
hue = 'Moods', data = df)
ax.yaxis.grid(True)
P14A = SessionA.query('Participants == "Participant15"')
P15A = SessionA.query('Participants == "Participant16"')
P30B = SessionB.query('Participants == "Participant30"')
P11B = SessionB.query('Participants == "Participant11"')
P20C = SessionC.query('Participants == "Participant20"')
P18C = SessionC.query('Participants == "Participant18"')
P23D = SessionD.query('Participants == "Participant23"')
P25D = SessionD.query('Participants == "Participant25"')
P12E = SessionE.query('Participants == "Participant12"')
P19E = SessionE.query('Participants == "Participant19"')
P01H = SessionH.query('Participants == "Participant1"')
P30H = SessionH.query('Participants == "Participant30"')
P10I = SessionI.query('Participants == "Participant10"')
P07I = SessionI.query('Participants == "Participant7"')
P04J = SessionJ.query('Participants == "Participant4"')
P22J = SessionJ.query('Participants == "Participant22"')
P03M = SessionM.query('Participants == "Participant3"')
P29M = SessionM.query('Participants == "Participant29"')
P16O = SessionO.query('Participants == "Participant16"')
P29O = SessionO.query('Participants == "Participant29"')
P13P = SessionP.query('Participants == "Participant13"')
P02P = SessionP.query('Participants == "Participant2"')
P29Q = SessionQ.query('Participants == "Participant29"')
P08Q = SessionQ.query('Participants == "Participant8"')
P02R = SessionR.query('Participants == "Participant2"')
P13R = SessionR.query('Participants == "Participant13"')
P07T = SessionT.query('Participants == "Participant7"')
P01T = SessionT.query('Participants == "Participant1"')
Data Visualisation
*showAll Graphs relay all values for participants in all sessions. The Box Plot integrated with a ScatterPlot represents the spread of all the data pertaining to the 4 Moods.
*The Line Plot represents how the values per Mood changed as time went on. The fatter the line is, the more deviated the values were for that specific second in time.
*Check Order Graphs represent Box Plot Graphs that denote the values for one Participant that saw the commercial first (TOP) and for one Participant that saw the commercial last (BOTTOM).
showAll(SessionA)
showAll(SessionB)
showAll(SessionC)
showAll(SessionD)
showAll(SessionE)
showAll(SessionF)
showAll(SessionG)
showAll(SessionH)
showAll(SessionI)
showAll(SessionJ)
showAll(SessionK)
showAll(SessionL)
showAll(SessionM)
showAll(SessionN)
showAll(SessionO)
showAll(SessionP)
showAll(SessionQ)
showAll(SessionR)
showAll(SessionS)
showAll(SessionT)
checkOrder(P14A, P15A)
checkOrder(P30B, P11B)
checkOrder(P20C, P18C)
checkOrder(P23D, P25D)
checkOrder(P12E, P19E)
checkOrder(P01H, P30H)
checkOrder(P10I, P07I)
checkOrder(P04J, P22J)
checkOrder(P03M, P29M)
checkOrder(P16O, P29O)
checkOrder(P13P, P02P)
checkOrder(P29Q, P08Q)
checkOrder(P02R, P13R)
checkOrder(P07T, P01T)